Single cell clustering algorithm benchmarking using Simulated dataset

1. Read in the results and truth

eda1 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.EDA1.rds")
eda2 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.EDA2.rds")
eda3 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.EDA3.rds")
eda4 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.EDA4.rds")

udb1 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.UDB1.rds")
udb2 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.UDB2.rds")
udb3 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.UDB3.rds")
udb4 <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/res.UDB4.rds")

eda1.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.EDA1.rds")
eda2.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.EDA2.rds")
eda3.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.EDA3.rds")
eda4.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.EDA4.rds")

udb1.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.UDB1.rds")
udb2.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.UDB2.rds")
udb3.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.UDB3.rds")
udb4.tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/tru.UDB4.rds")

Merge the data

res <- c(eda1,eda2,eda3,eda4,udb1,udb2,udb3,udb4)
tru <- c(eda1.tru, eda2.tru, eda3.tru, eda4.tru, udb1.tru, udb2.tru, udb3.tru, udb4.tru)
saveRDS(res,file = "~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/results.rds")
saveRDS(tru,file = "~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/truth.rds")
res <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/results.rds")
tru <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/truth.rds")

##. 2. Calculate clustering comparison metrics. Calculate Adjusted rand index

library(mclust)
## Package 'mclust' version 5.4.2
## Type 'citation("mclust")' for citing this R package in publications.
adj.mat <- matrix(0, nrow = 1024, ncol = 225)
dats <- names(res)
for(i in c(1:length(dats))){
  name = dats[i]
  res.mat <- res[[name]]
  tru.vec <- tru[[name]]
  adj.mat[i,] = apply(res.mat,2,function(x) adjustedRandIndex(x, tru.vec))
}
colnames(adj.mat) <- colnames(res[[1]])
rownames(adj.mat) <- dats
fmeasure <- function(clu1,clu2,label=NULL){
  #Label should be from clu1, exchange clu1 and clu2 when necessary
  if(sum(!is.na(clu1))==0 || sum(!is.na(clu2))==0){
    return(NA)
  }
  contab <- as.matrix(table(factor(clu1),factor(clu2)))
  if(is.null(label)){
    clus <- 1:length(unique(clu1))
  }else{
    clus <- c(label)
  }
  totf = 0;
  for(j in clus){
    maxf = 0;
    for(i in 1:length(unique(clu2))){
      pij = contab[j,i]/sum(contab[,i])
      rij = contab[j,i]/sum(contab[j,])
      fmea = 2*rij*pij/(rij+pij)
      if(!is.na(fmea) & fmea > maxf){
        maxf = fmea
      }
    }
    totf = totf+maxf*sum(contab[j,])
  }
  totf = totf/sum(contab[clus,])
  return(totf)
}
fm.mat <- matrix(0, nrow = 1024, ncol = 225)
dats <- names(res)
for(i in c(1:length(dats))){
  name = dats[i]
  res.mat <- res[[name]]
  tru.vec <- tru[[name]]
  fm.mat[i,] = apply(res.mat,2,function(x) fmeasure(x, tru.vec))
}
colnames(fm.mat) <- colnames(res[[1]])
rownames(fm.mat) <- dats
dats <- names(res)

Read in pre-calculated data.

adj.mat <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/adj.mat.rds")
fm.mat <- readRDS("~/storage/Github/scRNAseqBenchmark_Clustering/evaluation/fm.mat.rds")

##. 3. Overall analysis Extract Dataset features (Equl/Unequal distance, Equal/Unequal Size, etc)

cdist <- unlist(lapply(dats, function(x) strsplit(x,"_")[[1]][1]))
distype <- unlist(lapply(cdist, function(x) substr(x,1,3)))
csize <- unlist(lapply(dats, function(x) strsplit(x,"_")[[1]][2]))
libsize <- unlist(lapply(dats, function(x) strsplit(x,"_")[[1]][3]))
libscale <- unlist(lapply(dats, function(x) strsplit(x,"_")[[1]][4]))
dropout <- unlist(lapply(dats, function(x) strsplit(x,"_")[[1]][5]))
dat.features <- cbind(cdist,csize,libsize,libscale,dropout)
colnames(dat.features) <- c("Cluster_Distance", "Cluster_Size", "Library_Size", "Library_Scale", "Dropout")
rownames(dat.features) <- dats

PCA remove all zero columns and replace NA with 0

adj.mat.fil <- adj.mat[,apply(adj.mat,2,function(x) sum(is.na(x))!=1024)]
adj.mat.fil[is.na(adj.mat.fil)] = 0
adj.pca <- prcomp(adj.mat.fil)
adj.tsne <- Rtsne::Rtsne(adj.mat.fil)
adj.umap <- umap::umap(adj.mat.fil)

plot PCA and colored by each factor

par(mfrow=c(2,3))
plot(adj.pca$x[,c(1:2)], col=as.factor(cdist), pch=16, main="Cluster Distance")
plot(adj.pca$x[,c(1:2)], col=as.factor(distype), pch=16, main="Equal/Unequal Distance")
plot(adj.pca$x[,c(1:2)], col=as.factor(csize), pch=16, main="Equal/Unequal Cluster Size")
plot(adj.pca$x[,c(1:2)], col=as.factor(libsize), pch=16, main="Library Size")
plot(adj.pca$x[,c(1:2)], col=as.factor(libscale), pch=16, main="Library Scale")
plot(adj.pca$x[,c(1:2)], col=as.factor(dropout), pch=16, main="Dropout rate")

plot(adj.pca$x[,c(1:2)], col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster Distance")
plot(adj.pca$x[,c(1:2)], col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16, main="Equal/Unequal Distance")
plot(adj.pca$x[,c(1:2)], col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal Cluster Size")
plot(adj.pca$x[,c(1:2)], col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(adj.pca$x[,c(1:2)], col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(adj.pca$x[,c(1:2)], col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout rate")

Plot t-SNE and colored by each feature.

par(mfrow=c(2,3))
plot(adj.tsne$Y[,c(1:2)], col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster Distance")
plot(adj.tsne$Y[,c(1:2)], col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16, main="Equal/Unequal cluster distance")
plot(adj.tsne$Y[,c(1:2)], col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal cluster size")
plot(adj.tsne$Y[,c(1:2)], col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(adj.tsne$Y[,c(1:2)], col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(adj.tsne$Y[,c(1:2)], col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout rate")

Plot UMAP and colored by each factor

par(mfrow=c(2,3))
plot(adj.umap$layout[,c(1:2)], col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster distance")
plot(adj.umap$layout[,c(1:2)], col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16,main="Equal/Unequal Cluster Distance")
plot(adj.umap$layout[,c(1:2)], col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal Cluster Size")
plot(adj.umap$layout[,c(1:2)], col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(adj.umap$layout[,c(1:2)], col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(adj.umap$layout[,c(1:2)], col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout")

Do PCA, UMAP and TSNE for f-measure result

fm.mat.fil <- fm.mat[,apply(fm.mat,2,function(x) sum(is.na(x))!=1024)]
fm.mat.fil[is.na(fm.mat.fil)] = 0
fm.pca <- prcomp(fm.mat.fil)
fm.tsne <- Rtsne::Rtsne(fm.mat.fil)
fm.umap <- umap::umap(fm.mat.fil)
par(mfrow=c(2,3))
plot(fm.pca$x[,c(1:2)], col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster distance")
plot(fm.pca$x[,c(1:2)], col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16, main="Equal/Unequal Cluster Distance")
plot(fm.pca$x[,c(1:2)], col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal Cluster Size")
plot(fm.pca$x[,c(1:2)], col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(fm.pca$x[,c(1:2)], col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(fm.pca$x[,c(1:2)], col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout")

par(mfrow=c(2,3))
plot(fm.tsne$Y[,c(1:2)], col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster Distance")
plot(fm.tsne$Y[,c(1:2)], col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16, main="Equal/Unequal Cluster Distance")
plot(fm.tsne$Y[,c(1:2)], col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal Cluster Size")
plot(fm.tsne$Y[,c(1:2)], col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(fm.tsne$Y[,c(1:2)], col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(fm.tsne$Y[,c(1:2)], col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout")

par(mfrow=c(2,3))
plot(fm.umap$layout, col=c("#9bc0ee","#70a5e7","#4489df","#236fce","#9bc0ee","#70a5e7","#4489df","#236fce")[as.factor(cdist)], pch=16, main="Cluster Distance")
plot(fm.umap$layout, col=c("#c6c4c4","#7a7676")[as.factor(distype)], pch=16, main="Equal/Unequal Cluster Distance")
plot(fm.umap$layout, col=c("#c0ee9b","#6fce23")[as.factor(csize)], pch=16, main="Equal/Unequal Cluster Size")
plot(fm.umap$layout, col=c("#eec99b", "#e7b270","#df9a44","#ce8223")[as.factor(libsize)], pch=16, main="Library Size")
plot(fm.umap$layout, col=c("#c99bee","#b270e7","#9a44df","#8223ce")[as.factor(libscale)], pch=16, main="Library Scale")
plot(fm.umap$layout, col=c("#eea09b","#e77770","#df4d44","#ce2d23")[as.factor(dropout)], pch=16, main="Dropout")

library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 1.99.5
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## ========================================
## 
## This version is a major update of the package. The major new features are:
## 
## 1. Support splitting heatmaps by columns.
## 2. Support concatenating heatmaps/annotations vertically.
## 3. Provide more types of heatmap annotations.
## 4. Support UpSet plot.
## 
## Note this version is not 100% compatible with the older versions (< 1.99.0).
## Please check by `vignette('difference_to_old_versions', package = 'ComplexHeatmap')`.
## 
## Above messages will be removed in the future.
library(circlize)
## ========================================
## circlize version 0.4.5
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: http://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization 
##   in R. Bioinformatics 2014.
## ========================================

Extract features for each pipelien (gene seletion method, clustering algorithm, k)

meth.gene <-unlist(lapply(colnames(adj.mat.fil), function(x) strsplit(x,"_")[[1]][1]))
meth.algo <-unlist(lapply(colnames(adj.mat.fil), function(x) strsplit(x,"_")[[1]][2]))
meth.k <-unlist(lapply(colnames(adj.mat.fil), function(x) strsplit(x,"_")[[1]][3]))
meth.k[is.na(meth.k)]=4

###. All data + ARI

ha = HeatmapAnnotation(df = data.frame(Distance_Type=distype, dat.features), 
                       col = list(Distance_Type=c("EDA"="#c6c4c4","UDB"="#7a7676"),
                              Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce","UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))

hm = rowAnnotation(df = data.frame(Gene_Selection=meth.gene, K=meth.k),
                       col = list(Gene_Selection=c("HVG"="#23ce82", "HEG"="#5ae3a6", "HDG"="#9beec9"),
                                  K=c("2"="#236fce","3"="#2e7cdc","4"="#5a97e3","5"="#70a5e7","6"="#9bc0ee")))

Heatmap(t(adj.mat.fil), name="ARI",cluster_rows = TRUE, cluster_columns = TRUE, top_annotation = ha,left_annotation = hm, show_row_names = FALSE, show_column_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

orderby <- function(x, by){
   return(x[order(unlist(lapply(x, function(y) strsplit(y,"_")[[1]][by])))])
}

###. All data + f-measure

ha = HeatmapAnnotation(df = data.frame(Distance_Type=distype, dat.features), 
                       col = list(Distance_Type=c("EDA"="#c6c4c4","UDB"="#7a7676"),
                              Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce","UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))

hm = rowAnnotation(df = data.frame(Gene_Selection=meth.gene, K=meth.k),
                       col = list(Gene_Selection=c("HVG"="#23ce82", "HEG"="#5ae3a6", "HDG"="#9beec9"),
                                  K=c("2"="#236fce","3"="#2e7cdc","4"="#5a97e3","5"="#70a5e7","6"="#9bc0ee")))
                                  
Heatmap(t(fm.mat.fil), name="F-measure",cluster_rows = TRUE, cluster_columns = TRUE, top_annotation = ha, left_annotation = hm, show_row_names = FALSE, show_column_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HVG + k=4 + ARI

ha = rowAnnotation(df = data.frame(dat.features[1:512,]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HVG + k=4 + ARI + Order by Equal/Unequal cluster distance

adj.mat.fil.sub <- adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)]
ha = rowAnnotation(df = data.frame(dat.features[1:512,][orderby(rownames(adj.mat.fil.sub),2),]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))


Heatmap(adj.mat.fil.sub[orderby(rownames(adj.mat.fil.sub),2),], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HVG + k=4 + ARI + Order by Library size

adj.mat.fil.sub <- adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)]
ha = rowAnnotation(df = data.frame(dat.features[1:512,][orderby(rownames(adj.mat.fil.sub),3),]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))


Heatmap(adj.mat.fil.sub[orderby(rownames(adj.mat.fil.sub),3),], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HVG + k=4 + ARI + Order by Library scale

adj.mat.fil.sub <- adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)]
ha = rowAnnotation(df = data.frame(dat.features[1:512,][orderby(rownames(adj.mat.fil.sub),4),]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))


Heatmap(adj.mat.fil.sub[orderby(rownames(adj.mat.fil.sub),4),], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HVG + k=4 + ARI + Order by Dropout

adj.mat.fil.sub <- adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)]
ha = rowAnnotation(df = data.frame(dat.features[1:512,][orderby(rownames(adj.mat.fil.sub),5),]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))


Heatmap(adj.mat.fil.sub[orderby(rownames(adj.mat.fil.sub),5),], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HVG + k=4 + F-measure

ha = rowAnnotation(df = data.frame(dat.features[1:512,]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HEG + k=4 + ARI

ha = rowAnnotation(df = data.frame(dat.features[1:512,]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HEG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HEG + k=4 + F-measure

ha = rowAnnotation(df = data.frame(dat.features[1:512,]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('EDA',rownames(fm.mat.fil)),grepl("HEG",colnames(fm.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HDG + k=4 + ARI

ha = rowAnnotation(df = data.frame(dat.features[1:512,]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('EDA',rownames(adj.mat.fil)),grepl("HDG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HDG + k=4 + F-measure

ha = rowAnnotation(df = data.frame(dat.features[1:512,]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('EDA',rownames(fm.mat.fil)),grepl("HDG",colnames(fm.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Unequal distance + HVG + k=4 + ARI

ha = rowAnnotation(df = data.frame(dat.features[513:1024,]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('UDB',rownames(adj.mat.fil)),grepl("HVG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Unequal distance + HVG + k=4 + F-measure

ha = rowAnnotation(df = data.frame(dat.features[513:1024,]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('UDB',rownames(fm.mat.fil)),grepl("HVG",colnames(fm.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Unequal distance + HEG + k=4 + ARI

ha = rowAnnotation(df = data.frame(dat.features[513:1024,]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('UDB',rownames(adj.mat.fil)),grepl("HEG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Unequal distance + HEG + k=4 + F-measure

ha = rowAnnotation(df = data.frame(dat.features[513:1024,]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('UDB',rownames(fm.mat.fil)),grepl("HEG",colnames(fm.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Unequal distance + HDG + k=4 + ARI

ha = rowAnnotation(df = data.frame(dat.features[513:1024,]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.mat.fil[grepl('UDB',rownames(adj.mat.fil)),grepl("HDG",colnames(adj.mat.fil))][,c(1:15)], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Unequal distance + HDG + k=4 + F-measure

ha = rowAnnotation(df = data.frame(dat.features[513:1024,]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.mat.fil[grepl('UDB',rownames(fm.mat.fil)),grepl("HDG",colnames(fm.mat.fil))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

##. Local performance for clusters with smaller size F-measure can be used to compare the clustering result with truth for a subset of the cells. Here we used F-measure to evaluate the performance of those algorithms only for clusters that have closer distance to each other or have relatively smaller size.

####. Calculate f-measurefor the two clusters with smaller size

fm.smclus.mat <- matrix(0, nrow = 512, ncol = 225)
dats.us <- names(res)[grepl("USB", names(res))]
for(i in c(1:length(dats.us))){
  name = dats.us[i]
  res.mat <- res[[name]]
  tru.vec <- tru[[name]]
  fm.smclus.mat[i,] = apply(res.mat,2,function(x) fmeasure(tru.vec,x,c("Group3","Group4")))
}
colnames(fm.smclus.mat) <- colnames(res[[1]])
rownames(fm.smclus.mat) <- dats.us

####.Unequal Size + Unequal Distance + HVG + k=4 + F-measure for two small clusters

ha = rowAnnotation(df = data.frame(dat.features[dats.us[grepl("UDB", rownames(fm.smclus.mat))],]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.smclus.mat[grepl('UDB',rownames(fm.smclus.mat)) ,grepl("HVG",colnames(fm.smclus.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Unequal Size + Equal Distance + HVG + k=4 + F-measure for two small clusters

ha = rowAnnotation(df = data.frame(dat.features[dats.us[grepl("EDA", rownames(fm.smclus.mat))],]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.smclus.mat[grepl('EDA',rownames(fm.smclus.mat)) ,grepl("HVG",colnames(fm.smclus.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Calculate f-measurefor the two clusters with larger size

fm.lgclus.mat <- matrix(0, nrow = 512, ncol = 225)
for(i in c(1:length(dats.us))){
  name = dats.us[i]
  res.mat <- res[[name]]
  tru.vec <- tru[[name]]
  fm.lgclus.mat[i,] = apply(res.mat,2,function(x) fmeasure(tru.vec,x,c("Group1","Group2")))
}
colnames(fm.lgclus.mat) <- colnames(res[[1]])
rownames(fm.lgclus.mat) <- dats.us

####.Unequal Size + Unequal Distance + HVG + k=4 + F-measure for two large clusters

ha = rowAnnotation(df = data.frame(dat.features[dats.us[grepl("UDB", rownames(fm.lgclus.mat))],]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.lgclus.mat[grepl('UDB',rownames(fm.lgclus.mat)) ,grepl("HVG",colnames(fm.lgclus.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Unequal Size + Unequal Distance + HVG + k=4 + F-measure for two large clusters

ha = rowAnnotation(df = data.frame(dat.features[dats.us[grepl("EDA", rownames(fm.lgclus.mat))],]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.lgclus.mat[grepl('EDA',rownames(fm.lgclus.mat)) ,grepl("HVG",colnames(fm.lgclus.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

##. Local performance for clusters that are closer to each other #### Calculate the f-measure for clusters that are cluster to each other (Group 3,4) or far from each other (Group 1, 2)

fm.undist.mat <- matrix(0, nrow = 512, ncol = 225)
dats.ud <- names(res)[grepl("UDB", names(res))]
for(i in c(1:length(dats.ud))){
  name = dats.us[i]
  res.mat <- res[[name]]
  tru.vec <- tru[[name]]
  fm.undist.mat[i,] = apply(res.mat,2,function(x) fmeasure(tru.vec,x,c("Group3","Group4")))
}
colnames(fm.undist.mat) <- colnames(res[[1]])
rownames(fm.undist.mat) <- dats.ud

fm.eqdist.mat <- matrix(0, nrow = 512, ncol = 225)
for(i in c(1:length(dats.ud))){
  name = dats.us[i]
  res.mat <- res[[name]]
  tru.vec <- tru[[name]]
  fm.eqdist.mat[i,] = apply(res.mat,2,function(x) fmeasure(tru.vec,x,c("Group1","Group2")))
}
colnames(fm.eqdist.mat) <- colnames(res[[1]])
rownames(fm.eqdist.mat) <- dats.ud

####.Equal Size + Unequal Distance + HVG + k=4 + F-measure for two closer clusters

ha = rowAnnotation(df = data.frame(dat.features[dats.ud[grepl("ESA", rownames(fm.undist.mat))],]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.undist.mat[grepl('ESA',rownames(fm.undist.mat)) ,grepl("HVG",colnames(fm.undist.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Equal Size + Unequal Distance + HVG + k=4 + F-measure for two farer clusters

ha = rowAnnotation(df = data.frame(dat.features[dats.ud[grepl("ESA", rownames(fm.eqdist.mat))],]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.eqdist.mat[grepl('ESA',rownames(fm.eqdist.mat)) ,grepl("HVG",colnames(fm.eqdist.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Unequal Size + Unequal Distance + HVG + k=4 + F-measure for two closer clusters

ha = rowAnnotation(df = data.frame(dat.features[dats.ud[grepl("USB", rownames(fm.undist.mat))],]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.undist.mat[grepl('USB',rownames(fm.undist.mat)) ,grepl("HVG",colnames(fm.undist.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####.Unequal Size + Unequal Distance + HVG + k=4 + F-measure for two farer clusters

ha = rowAnnotation(df = data.frame(dat.features[dats.ud[grepl("USB", rownames(fm.eqdist.mat))],]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(fm.eqdist.mat[grepl('USB',rownames(fm.eqdist.mat)) ,grepl("HVG",colnames(fm.eqdist.mat))][,c(1:15)], name="F-measure",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

##. Maximum ARI for each data (Best performance for each method but may not necessarily have k=4)

gsms <- c("HVG","HDG", "HEG")
adj.bestp.mat <- matrix(0, nrow=1024, ncol=45)
adj.bestp.k.mat <- matrix(0,nrow=1024, ncol=45)
rownames(adj.bestp.mat) <- rownames(adj.mat.fil)
colnames(adj.bestp.mat) <- unique(unlist(lapply(strsplit(colnames(adj.mat.fil),"_"), function(x) paste(x[1],x[2],sep = "_"))))
rownames(adj.bestp.k.mat) = rownames(adj.bestp.mat)
colnames(adj.bestp.k.mat) = colnames(adj.bestp.mat)
clums <- unique(unlist(lapply(strsplit(colnames(adj.mat.fil),"_"), function(x) x[2])))
for(i in rownames(adj.mat.fil)){
  for(j in colnames(adj.bestp.mat)){
    tmp <- adj.mat.fil[i, grepl(j, colnames(adj.mat.fil))]
    adj.bestp.mat[i,j] = max(tmp)
    if(is.na(strsplit(names(which.max(tmp)),"_")[[1]][3])){
      adj.bestp.k.mat[i,j] = 4
    }else{
      adj.bestp.k.mat[i,j] = as.numeric(strsplit(names(which.max(tmp)),"_")[[1]][3])
    }
  }
}
ha = HeatmapAnnotation(df = data.frame(Distance_Type=distype, dat.features), 
                       col = list(Distance_Type=c("EDA"="#c6c4c4","UDB"="#7a7676"),
                              Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce","UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))

#hm = rowAnnotation(df = data.frame(Gene_Selection=meth.gene, K=meth.k), col = list(Gene_Selection=c("HVG"="#23ce82", "HEG"="#5ae3a6", "HDG"="#9beec9"), K=c("2"="#236fce","3"="#2e7cdc","4"="#5a97e3","5"="#70a5e7","6"="#9bc0ee")))

Heatmap(t(adj.bestp.mat), name="ARI",cluster_rows = TRUE, cluster_columns = TRUE, top_annotation = ha, show_row_names = FALSE, show_column_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HVG + k=4 + ARI + best performance

ha = rowAnnotation(df = data.frame(dat.features[1:512,]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.bestp.mat[grepl('EDA',rownames(adj.bestp.mat)),grepl("HVG",colnames(adj.bestp.mat))], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HVG + k=4 + ARI + best performance

ha = rowAnnotation(df = data.frame(dat.features[1:512,]), 
                       col = list(Cluster_Distance=c("EDA1"="#9bc0ee","EDA2"="#70a5e7","EDA3"="#4489df","EDA4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.bestp.k.mat[grepl('EDA',rownames(adj.bestp.k.mat)),grepl("HVG",colnames(adj.bestp.k.mat))], name="k",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(2, 4, 6), c("blue", "grey", "red")))

####. Equal distance (512) + HVG + k=4 + ARI + best performance

ha = rowAnnotation(df = data.frame(dat.features[513:1024,]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.bestp.mat[grepl('UDB',rownames(adj.bestp.mat)),grepl("HVG",colnames(adj.bestp.mat))], name="ARI",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")))

####. Equal distance (512) + HVG + k=4 + ARI + best performance

ha = rowAnnotation(df = data.frame(dat.features[513:1024,]), 
                       col = list(Cluster_Distance=c("UDB1"="#9bc0ee","UDB2"="#70a5e7","UDB3"="#4489df","UDB4"="#236fce"),
                                  Cluster_Size=c("ESA"="#c0ee9b","USB"="#6fce23"),
                                  Library_Size=c("LS1"="#eec99b", "LS2"="#e7b270","LS3"="#df9a44","LS4"="#ce8223"), 
                                  Library_Scale= c("LC1"="#c99bee","LC2"="#b270e7","LC3"="#9a44df","LC4"="#8223ce"),
                                  Dropout=c("DO1"="#eea09b","DO2"="#e77770","DO3"="#df4d44","DO4"="#ce2d23")))
Heatmap(adj.bestp.k.mat[grepl('UDB',rownames(adj.bestp.k.mat)),grepl("HVG",colnames(adj.bestp.k.mat))], name="k",cluster_rows = FALSE, right_annotation = ha, show_row_names = FALSE, col = colorRamp2(c(2, 4, 6), c("blue", "grey", "red")))